!> Main program for the CG method.
program cg_main

 use mod_utils, only: &
   t_realtime, my_second

 use mod_messages, only: &
   mod_messages_constructor, &
   mod_messages_destructor,  &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 !$ use mod_omp_utils, only: &
 !$   mod_omp_utils_constructor, &
 !$   mod_omp_utils_destructor,  &
 !$   mod_omp_utils_initialized, &
 !$   detailed_timing_omp, &
 !$   omput_push_key,      &
 !$   omput_pop_key,       &
 !$   omput_start_timer,   &
 !$   omput_close_timer,   &
 !$   omput_write_time

 use mod_kinds, only: &
   mod_kinds_constructor, &
   mod_kinds_destructor,  &
   mod_kinds_initialized, &
   wp

 use mod_mpi_utils, only: &
   mod_mpi_utils_constructor, &
   mod_mpi_utils_destructor,  &
   mod_mpi_utils_initialized, &
   mpi_comm_world, &
   mpi_init, mpi_finalize, &
   mpi_comm_size, mpi_comm_rank

 use mod_fu_manager, only: &
   mod_fu_manager_constructor, &
   mod_fu_manager_destructor,  &
   mod_fu_manager_initialized, &
   new_file_unit

 use mod_octave_io, only: &
   mod_octave_io_constructor, &
   mod_octave_io_destructor,  &
   mod_octave_io_initialized, &
   write_octave, &
   read_octave

 use mod_sympoly, only: &
   mod_sympoly_constructor, &
   mod_sympoly_destructor,  &
   mod_sympoly_initialized, &
   show

 use mod_octave_io_sympoly, only: &
   mod_octave_io_sympoly_constructor, &
   mod_octave_io_sympoly_destructor,  &
   mod_octave_io_sympoly_initialized, &
   write_octave

 use mod_linal, only: &
   mod_linal_constructor, &
   mod_linal_destructor,  &
   mod_linal_initialized, &
   invmat_nop

 use mod_perms, only: &
   mod_perms_constructor, &
   mod_perms_destructor,  &
   mod_perms_initialized

 use mod_octave_io_perms, only: &
   mod_octave_io_perms_constructor, &
   mod_octave_io_perms_destructor,  &
   mod_octave_io_perms_initialized

 use mod_sparse, only: &
   mod_sparse_constructor, &
   mod_sparse_destructor,  &
   mod_sparse_initialized, &
   ! sparse types
   t_col,       &
   t_tri,       &
   ! construction of new objects
   new_col,     &
   new_tri,     &
   ! convertions
   col2tri,     &
   tri2col,     &
   ! overloaded operators
   operator(+), &
   operator(*), &
   sum,         &
   transpose,   &
   matmul,      &
   clear

 use mod_octave_io_sparse, only: &
   mod_octave_io_sparse_constructor, &
   mod_octave_io_sparse_destructor,  &
   mod_octave_io_sparse_initialized, &
   write_octave

 use mod_linsolver, only: &
   mod_linsolver_constructor, &
   mod_linsolver_destructor,  &
   !1) Parameters which should be set before using the module:
   t_nsolver_general_parms, linsolver_general_parms, &
   t_nsolver_umfpack_parms, linsolver_umfpack_parms, &
   t_nsolver_mumps_parms,   linsolver_mumps_parms,   &
   t_nsolver_itersol_parms, linsolver_itersol_parms, &
   !2) Main module functions
   factor, solve, clean

 use mod_output_control, only: &
   mod_output_control_constructor, &
   mod_output_control_destructor,  &
   mod_output_control_initialized, &
   elapsed_format, &
   base_name

 use mod_master_el, only: &
   mod_master_el_constructor, &
   mod_master_el_destructor,  &
   mod_master_el_initialized, &
   t_lagnodes, lag_nodes, &
   write_octave

 use mod_grid, only: &
   mod_grid_constructor, &
   mod_grid_destructor,  &
   mod_grid_initialized, &
   t_grid, t_ddc_grid,   &
   new_grid, clear,      &
   write_octave

 use mod_numquad, only: &
   mod_numquad_constructor, &
   mod_numquad_destructor,  &
   mod_numquad_initialized

 use mod_cgdofs, only: &
   mod_cgdofs_constructor, &
   mod_cgdofs_destructor,  &
   mod_cgdofs_initialized, &
   t_cgdofs, t_ddc_cgdofs, &
   new_cgdofs,           &
   clear,                &
   write_octave

 use mod_base, only: &
   mod_base_constructor, &
   mod_base_destructor,  &
   mod_base_initialized, &
   t_base, clear, &
   write_octave

 use mod_bcs, only: &
   mod_bcs_constructor, &
   mod_bcs_destructor,  &
   mod_bcs_initialized, &
   b_dir,                 &
   t_bcs, new_bcs, clear, &
   write_octave

 use mod_fe_spaces, only: &
   mod_fe_spaces_constructor, &
   mod_fe_spaces_destructor,  &
   mod_fe_spaces_initialized, &
   cg_base => cg ! scalar f.e. space

 use mod_lhs, only: &
   mod_lhs_constructor, &
   mod_lhs_destructor,  &
   mod_lhs_initialized, &
   lhs_setup,      &
   lhs_clean,      &
   lhs,            &
   precon_diag,    &
   precon_tridiag, &
   precon_multidiag

 use mod_testcases, only: &
   mod_testcases_constructor, &
   mod_testcases_destructor,  &
   mod_testcases_initialized, &
   test_name,   &
   test_description,&
   coeff_diff,  &
   coeff_adv,   &
   coeff_dir,   &
   coeff_lam,   &
   coeff_q

 use mod_cg_locmat, only: &
   mod_cg_locmat_constructor, &
   mod_cg_locmat_destructor,  &
   mod_cg_locmat_initialized, &
   cg_locmat, &
   t_bcs_error

 use mod_error_norms, only: &
   mod_error_norms_constructor, &
   mod_error_norms_destructor,  &
   mod_error_norms_initialized, &
   primal_err, &
   primal_flux_err

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------
 
 ! Main parameters
 character(len=*), parameter :: this_prog_name = 'cg'
 integer, parameter :: max_char_len = 1000

 ! FE space
 integer :: k
 type(t_base) :: base
 type(t_lagnodes), allocatable :: ref_nodes(:)
 type(t_cgdofs    ) :: udofs
 type(t_ddc_cgdofs) :: ddc_udofs

 ! Grid and bcs
 integer :: nbound
 character(len=max_char_len) :: grid_file, cbc_type
 type(t_grid    ), target :: grid
 type(t_ddc_grid)         :: ddc_grid
 type(t_bcs), target :: bcs
 integer, allocatable :: bc_type_t(:,:)

 ! Test case
 character(len=max_char_len) :: testname

 ! Local matrixes
 logical :: stat_cnd ! perform static condensation
 integer :: pk, pkc
 integer :: ie, il, jl, pos
 real(wp), allocatable :: aak(:,:) , fk(:), aakc(:,:), fkc(:), &
   aak22i(:,:)
 real(wp), allocatable :: uuk(:,:,:), tk(:,:), uuuc(:)

 ! Global matrix
 integer :: nnz_mmm11, ndofsc, gn
 integer, allocatable :: mmmi(:), mmmj(:)
 real(wp), allocatable :: mmmx(:), fff(:), fff1(:), fff2(:), &
  uuu(:), uuu1(:), rhs(:)
 type(t_col) :: mmm, mmm11, mmm12, mmm21, mmm22

 ! bcs variables
 integer :: i, ndofs_dir, ndofs_nat
 integer, allocatable :: dofs_nat(:), dofs_dir(:), gdofs_nat(:)
 real(wp), allocatable :: uuu2(:)

 ! Linear system solver
 logical :: itsol_abstol
 integer :: umfpack_print_level, precon_ndiags
 real(wp) :: itsol_toll
 character(len=max_char_len) :: linsolver, itsolver, &
   preconditioner

 ! MPI variables
 integer :: mpi_nd, mpi_id

 ! Error computations
 logical :: compute_error_norms
 integer :: err_extra_deg
 real(wp) :: e_u_l2, e_flux_l2
 real(wp), allocatable :: uuu_dg(:)

 ! IO variables
 character(len=*), parameter :: input_file_name_def = 'cg.in'
 character(len=max_char_len) :: input_file_name
 character(len=max_char_len) :: basename
 character(len=*), parameter :: out_file_nml_suff = '-nml.octxt'
 character(len=10000+len(out_file_nml_suff)) :: out_file_nml_name
 logical :: write_grid
 character(len=*), parameter :: out_file_grid_suff = '-grid.octxt'
 character(len=10000+len(out_file_grid_suff)) :: out_file_grid_name
 character(len=*), parameter :: out_file_base_suff = '-base.octxt'
 character(len=10000+len(out_file_base_suff)) :: out_file_base_name
 logical :: write_sys
 character(len=*), parameter :: out_file_results_suff = '-results.octxt'
 character(len=10000+len(out_file_results_suff)) :: out_file_results_name

 ! Auxiliary variables
 integer :: fu, ierr
 real(t_realtime) :: t00, t0, t1
 character(len=10*max_char_len) :: message(1)
 type(t_bcs_error) :: b_err

 ! Input namelist
 namelist /input/ &
   ! test case
   testname,    &
   ! output base name
   basename,    &
   ! base
   k,           &
   ! grid
   write_grid,  &
   grid_file,   &
   ! boundary conditions
   nbound, cbc_type, &
   ! linear system solver
   linsolver,   &
   ! UMFPACK control variables (if linsolver.eq."iterative")
   umfpack_print_level, &
   ! iterative solver control variables (if linsolver.eq."iterative")
   itsolver,                 &
   itsol_abstol, itsol_toll, &
   !  precon_ndiags can be larger than the size of the matrix, in
   !  which case the whole matrix is used as preconditioner
   preconditioner, precon_ndiags, &
   ! output details
   write_sys,   &
   ! errors
   compute_error_norms, err_extra_deg

 !----------------------------------------------------------------------
 ! Preliminary initializations
 t00 = my_second()
 call mod_messages_constructor()

 !$ if(detailed_timing_omp) call mod_omp_utils_constructor()

 call mod_kinds_constructor()

 call mpi_init(ierr)
 call mod_mpi_utils_constructor()
 call mpi_comm_size(mpi_comm_world,mpi_nd,ierr)
 call mpi_comm_rank(mpi_comm_world,mpi_id,ierr)

 call mod_fu_manager_constructor()

 call mod_octave_io_constructor()

 call mod_sympoly_constructor()
 call mod_octave_io_sympoly_constructor()

 call mod_linal_constructor()

 call mod_perms_constructor()
 call mod_octave_io_perms_constructor()

 call mod_sparse_constructor()
 call mod_octave_io_sparse_constructor()
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Read input file
 if(command_argument_count().gt.0) then
   call get_command_argument(1,value=input_file_name)
 else
   input_file_name = input_file_name_def
 endif

 call new_file_unit(fu,ierr)
 open(fu,file=trim(input_file_name), &
      status='old',action='read',form='formatted',iostat=ierr)
 read(fu,input)
 close(fu,iostat=ierr)
 ! If there are more processes, each of them needs its own basename
 ! and grid
 if(mpi_nd.gt.1) then
   write(basename, '(a,a,i3.3)') trim(basename),'-P',mpi_id
   write(grid_file,'(a,a,i3.3)') trim(grid_file),'.',mpi_id
 endif
 ! echo the input namelist
 out_file_nml_name = trim(basename)//out_file_nml_suff
 open(fu,file=trim(out_file_nml_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
 write(fu,input)
 close(fu,iostat=ierr)

 allocate(bc_type_t(nbound,2))
 read(cbc_type,*) bc_type_t ! transposed, for simplicity
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! We can now initialize the linsolver constructor on ALL the
 ! processes and set the solver parameters (in fact, such parameters
 ! will not change during the entire execution).
 call mod_linsolver_constructor(trim(linsolver))
 linsolver_general_parms%transposed_mat = .false.
 linsolver_umfpack_parms%umfpack_print_level = &
         umfpack_print_level
 linsolver_mumps_parms%distributed      = .true.
 linsolver_itersol_parms%itsolver       = itsolver
 linsolver_itersol_parms%itsol_toll     = itsol_toll
 linsolver_itersol_parms%itsol_abstol   = itsol_abstol
 linsolver_itersol_parms%preconditioner = preconditioner
 linsolver_itersol_parms%precon_ndiags  = precon_ndiags
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! output control setup
 call mod_output_control_constructor(basename)
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Construct the grid (without the continuous dofs, which will be
 ! computed later once the FE basis has been defined)
 t0 = my_second()

 call mod_master_el_constructor()

 call mod_grid_constructor()
 call new_grid(grid,trim(grid_file) , ddc_grid,mpi_comm_world)

 ! write the octave output
 if(write_grid) then
   out_file_grid_name = trim(base_name)//out_file_grid_suff
   call new_file_unit(fu,ierr)
   open(fu,file=trim(out_file_grid_name), &
        status='replace',action='write',form='formatted',iostat=ierr)
    call write_octave(grid    ,'grid'    ,fu)
    call write_octave(ddc_grid,'ddc_grid',fu)
   close(fu,iostat=ierr)
 endif

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed grid construction: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Collect the information concerning the boundary conditions
 call mod_bcs_constructor()
 call new_bcs(bcs,grid,transpose(bc_type_t) , ddc_grid,mpi_comm_world)
 deallocate( bc_type_t )

 if(write_grid) then
   call new_file_unit(fu,ierr)
   open(fu,file=trim(out_file_grid_name), status='old',action='write', &
        form='formatted',position='append',iostat=ierr)
    call write_octave(bcs,'bcs',fu)
   close(fu,iostat=ierr)
 endif
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Define the finite element space
 t0 = my_second()

 call mod_numquad_constructor()

 call mod_base_constructor()

 call mod_fe_spaces_constructor()

 call mod_cgdofs_constructor()

 ! nodal Lagrangian basis
 call lag_nodes(ref_nodes,grid%me%d,k)
 base = cg_base(grid%me,k,ref_nodes)
 call new_cgdofs( udofs,ref_nodes,grid,bcs , &
           mpi_comm_world,ddc_grid,ddc_udofs )

 ! write the octave output
 out_file_base_name = trim(base_name)//out_file_base_suff
 call new_file_unit(fu,ierr)
 open(fu,file=trim(out_file_base_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
   call write_octave(base     ,'base'     ,fu)
   call write_octave(ref_nodes,'ref_nodes',fu)
   if(write_grid) then
     call write_octave(udofs    ,'udofs'    ,fu)
     call write_octave(ddc_udofs,'ddc_udofs',fu)
   endif
 close(fu,iostat=ierr)

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed FE basis construction: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Test case setup
 call mod_testcases_constructor(testname,grid%m)
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Assemble matrix and right hand side
 call mod_cg_locmat_constructor()

 t0 = my_second()

 pk = base%pk
 stat_cnd = ref_nodes(grid%d)%nn.gt.0
 if(stat_cnd) then
   pkc = pk - ref_nodes(grid%d)%nn ! local dofs after static cnd.
   ndofsc = udofs%ndofs - udofs%ndofsd(grid%d)
   gn = ddc_udofs%ngnat_dofs - ddc_udofs%ngdofsd(grid%d)
 else
   pkc = pk
   ndofsc = udofs%ndofs
   gn = ddc_udofs%ngnat_dofs
 endif
 allocate(  aak(pk ,pk ) ,  fk(pk ) , &
           aakc(pkc,pkc) , fkc(pkc) , &
              aak22i(pk-pkc,pk-pkc) )
 allocate( uuk(pk-pkc,pkc,grid%ne) , tk(pk-pkc,grid%ne) )
 allocate( mmmi(grid%ne*pkc**2) , mmmj(grid%ne*pkc**2) )
 allocate( mmmx(grid%ne*pkc**2) , fff(ndofsc) )
 fff = 0.0_wp

 elem_loop: do ie=1,grid%ne

   !--------------------------------------------------------------------
   !1) compute local matrices
   call cg_locmat( aak , fk , base , grid%e(ie) , bcs%b_e2be(ie) ,b_err)

   if(b_err%lerr) &
     call error(this_prog_name,'',b_err%message)
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   !2) static condensation (for k sufficiently large)
   if(stat_cnd) then
     ! A22i = inv(A22)
     call invmat_nop( aak(pkc+1:pk,pkc+1:pk) , aak22i )
     ! U = A22i*A21
     uuk(:,:,ie) = matmul( aak22i , aak(pkc+1:pk,1:pkc) )
     tk(:,ie) = matmul( aak22i , fk(pkc+1:pk) )
     ! Ac = A11 - A12*A22i*A21
     aakc = aak(1:pkc,1:pkc) - matmul(aak(1:pkc,pkc+1:pk),uuk(:,:,ie))
     ! bc = b1 - A12*A22i*b2
     fkc = fk(1:pkc) - matmul( aak(1:pkc,pkc+1:pk) , tk(:,ie) )
   else
     aakc = aak
     fkc = fk
   endif
   !--------------------------------------------------------------------

   !--------------------------------------------------------------------
   !3.1) node oriented summation of the element contributions
   ! The summation is done in two steps, exploting the operations
   ! defined for sparse matrices:
   ! a) the contributions of each element are collected into three
   !  vectors: indices and value, without doing any summation of the
   !  contributions of different elements;
   ! b) the sparse matrix is constructed and reduced: the reduction
   !  operation sums the element contributions.
   ! Notice that step b) has to be done outside the element loop.
   loc_test_do: do il=1,pkc
     loc_shape_do: do jl=1,pkc
       pos = (ie-1)*pkc**2 + (il-1)*pkc + jl

       mmmi(pos) = udofs%dofs(il,ie)
       mmmj(pos) = udofs%dofs(jl,ie)
       mmmx(pos) = aakc(il,jl)
     enddo loc_shape_do
     fff(udofs%dofs(il,ie)) = fff(udofs%dofs(il,ie)) + fkc(il)
   enddo loc_test_do
   !--------------------------------------------------------------------

 enddo elem_loop

 deallocate(  aak ,  fk , &
             aakc , fkc , &
               aak22i )
 t1 = my_second()
 write(message(1),elapsed_format) &
 'Completed computation of the local matrices: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !3.2) complete the node oriented summation of the element contributions
 ! notice that indexes in sparse matrices start from 0
 t0 = my_second()

 mmm = tri2col(new_tri(ndofsc,ndofsc,mmmi-1,mmmj-1,mmmx))
 deallocate( mmmi , mmmj , mmmx )

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed assembling of the linear system: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Partition mmm and the rhs according to the Dirichlet bcs. We
 ! exploit here the fact that the nodes eliminated in the static
 ! condensations are always the last ref_nodes(grid%d)%nn. In terms of
 ! dofs_dir and dofs_nat, there are no changes in dofs_dir and the
 ! last ref_nodes(grid%d)%nn nodes in dofs_nat are eliminated.
 t0 = my_second()
 ! count the Dirichlet dofs
 ndofs_dir = size(udofs%dir_dofs,2)
 ndofs_nat = ndofsc - ndofs_dir ! remaining ones
 allocate(     uuu1(ndofs_nat) ,     uuu2(ndofs_dir) )
 allocate( dofs_nat(ndofs_nat) , dofs_dir(ndofs_dir) )
 dofs_nat = udofs%nat_dofs(1:ndofs_nat) - 1 ! drop the last ones
 dofs_dir = udofs%dir_dofs(1,:) - 1         ! all
 ! compute uuu2 (Dirichlet datum)
 do i=1,size(udofs%idir_dofs,2)
   ! If there is at least one dof on the i-th boundary region
   if(udofs%idir_dofs(2,i).ge.udofs%idir_dofs(1,i)) &
    uuu2(udofs%idir_dofs(1,i):udofs%idir_dofs(2,i)) = coeff_dir(       &
udofs%x(:,udofs%dir_dofs(1,udofs%idir_dofs(1,i):udofs%idir_dofs(2,i))),&
                       udofs%dir_dofs(3,udofs%idir_dofs(1,i))   )
 enddo

 !         row             column
 mmm11 = dofs_nat * (mmm * dofs_nat)
 mmm12 = dofs_nat * (mmm * dofs_dir)
 mmm21 = dofs_dir * (mmm * dofs_nat)
 mmm22 = dofs_dir * (mmm * dofs_dir)

 allocate(fff1(ndofs_nat))
 fff1 = fff(dofs_nat+1)

 t1 = my_second()
 write(message(1),elapsed_format) &
 'Completed partitioning of the linear system: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Solution of the linear system
 t0 = my_second()

 allocate(rhs(mmm11%n))
 rhs = fff1 - matmul(mmm12,uuu2)

 ! define the local to global map
 allocate( gdofs_nat(ndofs_nat) )
 gdofs_nat = ddc_udofs%gnat_dofs(1:ndofs_nat) - 1

 call factor(mmm11 , gn,gdofs_nat,mpi_comm_world)
 call solve (mmm11,uuu1,rhs , 0.0_wp*uuu1,mpi_comm_world)
 call clean ()

 deallocate( rhs , gdofs_nat )

 ! boundary terms
 allocate(fff2(mmm22%n))
 fff2 = matmul(mmm21,uuu1) + matmul(mmm22,uuu2)

 ! pack back the values
 allocate( uuu(udofs%ndofs) )
 uuu(dofs_nat+1) = uuu1
 uuu(dofs_dir+1) = uuu2
 fff(dofs_dir+1) = fff2

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed solution of the linear system: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Clear the variables used in the solution of the linear system
 nnz_mmm11 = mmm11%nz
 call clear( mmm11 )
 call clear( mmm12 )
 call clear( mmm21 )
 call clear( mmm22 )
 deallocate( fff1 , fff2 )
 deallocate( uuu1 , uuu2 )
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Postprocessings: complete uuu
 if(stat_cnd) then
   pk = base%pk
   pkc = pk - ref_nodes(grid%d)%nn ! loc. dofs after static cnd.
   allocate(uuuc(pkc))
   do ie=1,grid%ne
     uuuc = uuu(udofs%dofs(1:pkc,ie))
     uuu(udofs%dofs(pkc+1:pk,ie)) = tk(:,ie) - matmul(uuk(:,:,ie),uuuc)
   enddo
   deallocate(uuuc)
 endif
 deallocate( uuk , tk )
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Error computation
 t0 = my_second()

 ! In order to re-use as much as possible the error computation
 ! subroutines written for the DG case, we reshape the numerical
 ! solution in terms of "discontinuous" functions, by formally
 ! assigning different degrees of freedom to each elements. Of course,
 ! these additional degrees of freedom are indeed copies of the CG
 ! dofs. This is done outside the error computations since it also
 ! helps plotting the results.
 allocate(uuu_dg(grid%ne*base%pk))
 do ie=1,grid%ne
   uuu_dg( (ie-1)*base%pk+1 : ie*base%pk ) = uuu(udofs%dofs(:,ie))
 enddo
 errnorms: if(compute_error_norms) then
   call mod_error_norms_constructor()

   call primal_err(grid,base,uuu_dg,coeff_lam,err_extra_deg, e_u_l2 )

   call primal_flux_err(grid,base,uuu_dg,coeff_q,coeff_diff,coeff_adv, &
                        err_extra_deg, e_flux_l2 )

   call mod_error_norms_destructor()
 endif errnorms

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed error computations: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Write the results
 call new_file_unit(fu,ierr)
 out_file_results_name = trim(basename)//out_file_results_suff
 open(fu,file=trim(out_file_results_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
   ! preamble
   call write_octave(test_name       ,'test_name'       ,fu)
   call write_octave(test_description,'test_description',fu)
   call write_octave(nnz_mmm11       ,'nnz_mmm11'       ,fu)
   ! data
   if(write_sys) then
     call write_octave(dofs_nat,'c','dofs_nat',fu)
     call write_octave(dofs_dir,'c','dofs_dir',fu)
     call write_octave(mmm,'mmm',fu)
     call write_octave(fff,'c','fff',fu)
   endif
   call write_octave(uuu,'c','uuu',fu)
   call write_octave(uuu_dg,'c','uuu_dg',fu)
   if(compute_error_norms) then
     call write_octave(e_u_l2,'e_u_l2',fu)
     call write_octave(e_flux_l2,'e_flux_l2',fu)
   endif
 close(fu,iostat=ierr)
 deallocate( uuu_dg )
 !----------------------------------------------------------------------

 !----------------------------------------------------------------------
 ! Cleanup
 deallocate( dofs_nat , dofs_dir , fff , uuu )
 call clear( mmm )
 call clear( base ); deallocate( ref_nodes )
 call clear( udofs ); call clear( ddc_udofs )

 call mod_cg_locmat_destructor()

 call mod_testcases_destructor()

 call mod_cgdofs_destructor()

 call mod_fe_spaces_destructor()

 call mod_base_destructor()

 call mod_numquad_destructor()

 call clear( bcs )
 call mod_bcs_destructor()

 call clear( grid ); call clear( ddc_grid )
 call mod_grid_destructor()

 call mod_master_el_destructor()

 call mod_output_control_destructor()

 call mod_linsolver_destructor()

 call mod_octave_io_sparse_destructor()
 call mod_sparse_destructor()

 call mod_octave_io_perms_destructor()
 call mod_perms_destructor()

 call mod_linal_destructor()

 call mod_octave_io_sympoly_destructor()
 call mod_sympoly_destructor()

 call mod_octave_io_destructor()

 call mod_fu_manager_destructor()

 call mod_mpi_utils_destructor()
 call mpi_finalize(ierr)

 call mod_kinds_destructor()

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Done: elapsed time ',t1-t00,'s.'
 call info(this_prog_name,'',message(1))

 !$ if(detailed_timing_omp) call mod_omp_utils_destructor()

 call mod_messages_destructor()
 !----------------------------------------------------------------------

end program cg_main

